reqpkg <- c("ggplot2","plotly","DESeq2","magrittr","dplyr","genefilter","AnnotationHub","org.Mm.eg.db","GO.db","vsn","pheatmap","biomaRt","curl","RColorBrewer","viridis","fgsea","tidyverse","DT","ggpubr")
for (i in reqpkg) {
  suppressWarnings(suppressMessages(library(i, quietly=T, verbose=T, warn.conflicts=F,character.only=T)))
  print(i)
}
## [1] "ggplot2"
## [1] "plotly"
## [1] "DESeq2"
## [1] "magrittr"
## [1] "dplyr"
## [1] "genefilter"
## [1] "AnnotationHub"
## [1] "org.Mm.eg.db"
## [1] "GO.db"
## [1] "vsn"
## [1] "pheatmap"
## [1] "biomaRt"
## [1] "curl"
## [1] "RColorBrewer"
## [1] "viridis"
## [1] "fgsea"
## [1] "tidyverse"
## [1] "DT"
## [1] "ggpubr"
theme_set(theme_pubr())
select <- dplyr::select
ensembl <- useMart("ensembl")
ensemblMouse <- useDataset("mmusculus_gene_ensembl", mart=ensembl)
mouseProteinCodingGenes <- getBM(
  attributes=c("ensembl_gene_id","external_gene_name","description"), 
  filters='biotype', 
  values=c('protein_coding'), 
  mart=ensemblMouse)

DESeq

df <- read.csv("combatSeq_corrected_data.csv") %>%
  .[rowSums(sapply(., '%in%', mouseProteinCodingGenes$external_gene_name)) > 0,] %>% 
  set_rownames(.$X) %>% 
  select(-X)

sex_list <- c(rep("female", 16), rep("male", 16))
cond_list <- rep(c(rep("12wConv", 4), rep("4wConv", 4), rep("GF", 4), rep("SPF", 4)), 2)
condition_list <- data.frame(row.names=colnames(df), Sex=sex_list, Condition=cond_list)

condition_list$Sex <- relevel(condition_list$Sex, "female")
condition_list$Condition <- relevel(condition_list$Condition, "SPF")

dds <- DESeq2::DESeqDataSetFromMatrix(countData=df,
                                      colData = condition_list,
                                      design = ~ Sex + Condition + Sex:Condition)
head(dds)
## class: DESeqDataSet 
## dim: 6 32 
## metadata(1): version
## assays(1): counts
## rownames(6): Rp1 Sox17 ... Gm37988 Tcea1
## rowData names(0):
## colnames(32): MF.20 MF.24 ... MF.11 MF.12
## colData names(2): Sex Condition
dds <- estimateSizeFactors(dds)
colData(dds)
## DataFrame with 32 rows and 3 columns
##            Sex Condition        sizeFactor
##       <factor>  <factor>         <numeric>
## MF.20   female   12wConv  1.08051709610664
## MF.24   female   12wConv  1.15107649494338
## MF.1    female   12wConv 0.946422330177653
## MF.3    female   12wConv 0.932181787801605
## MF.19   female    4wConv  1.10847475975552
## ...        ...       ...               ...
## MF.9      male        GF 0.924664868739107
## MF.27     male       SPF    1.104719567798
## MF.28     male       SPF  1.08550756866473
## MF.11     male       SPF 0.971434598105476
## MF.12     male       SPF 0.921528466633396
dds$Group <- factor(paste0(dds$Sex, dds$Condition), levels = c("femaleSPF", "female12wConv", "female4wConv", "femaleGF", "maleSPF", "male12wConv", "male4wConv", "maleGF"))
design(dds) <- ~  Group
vsd <- vst(dds, blind = TRUE)
head(assay(vsd), 3)
##           MF.20     MF.24      MF.1      MF.3     MF.19     MF.21      MF.2
## Rp1    7.422555  7.680962  7.547485  7.549269  7.672037  7.676788  7.766229
## Sox17  9.069960  8.785837  8.699090  8.955951  8.563502  8.671979  8.832556
## Mrpl15 9.981710 10.275255 10.127250 10.101192 10.057358 10.175637 10.061974
##             MF.4     MF.29     MF.30     MF.13     MF.14     MF.31     MF.32
## Rp1     7.755435  7.312500  7.689598  7.312500  7.580634  7.565477  7.506628
## Sox17   8.575799  9.062377  8.910226  8.818694  8.826154  8.573463  8.552819
## Mrpl15 10.127827 10.252663 10.409074 10.917596 10.180764 10.092134 10.227681
##            MF.15     MF.16    MF.22     MF.23      MF.6      MF.8     MF.17
## Rp1     7.482708  7.430595 7.620154  7.684179  7.796634  7.574916  7.533806
## Sox17   8.628823  8.647058 8.619706  8.794454  8.821164  8.756439  8.775720
## Mrpl15 10.081052 10.095532 9.945673 10.196460 10.053388 10.025887 10.179902
##            MF.18      MF.5      MF.7     MF.25     MF.26     MF.10      MF.9
## Rp1     7.687536  7.774401  7.598828  7.556825  7.721238  7.553993  7.648320
## Sox17   8.877565  8.525481  8.814587  8.790266  9.079339  8.802868  8.821464
## Mrpl15 10.095672 10.035899 10.079328 10.066285 10.108856 10.060285 10.111368
##           MF.27     MF.28     MF.11     MF.12
## Rp1    7.655965  7.763498  7.678652  7.578663
## Sox17  8.691374  8.554596  8.908309  8.939845
## Mrpl15 9.911917 10.277160 10.167866 10.129378
colData(vsd)
## DataFrame with 32 rows and 4 columns
##            Sex Condition        sizeFactor         Group
##       <factor>  <factor>         <numeric>      <factor>
## MF.20   female   12wConv  1.08051709610664 female12wConv
## MF.24   female   12wConv  1.15107649494338 female12wConv
## MF.1    female   12wConv 0.946422330177653 female12wConv
## MF.3    female   12wConv 0.932181787801605 female12wConv
## MF.19   female    4wConv  1.10847475975552  female4wConv
## ...        ...       ...               ...           ...
## MF.9      male        GF 0.924664868739107        maleGF
## MF.27     male       SPF    1.104719567798       maleSPF
## MF.28     male       SPF  1.08550756866473       maleSPF
## MF.11     male       SPF 0.971434598105476       maleSPF
## MF.12     male       SPF 0.921528466633396       maleSPF
rld <- rlog(dds, blind=FALSE)
## rlog() may take a few minutes with 30 or more samples,
## vst() is a much faster transformation
ntd <- normTransform(dds)
meanSdPlot(assay(ntd))

meanSdPlot(assay(vsd))

meanSdPlot(assay(rld))

vst(dds[,1:16]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, female only")

vst(dds[,17:32]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, male only")

plotPCA(vsd, intgroup=c("Sex","Condition")) + ggtitle("vst")

plotPCA(vsd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1)

plotPCA(ntd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("nld")

plotPCA(rld, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("rld")

dds <- DESeq(dds)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#Compare SPF v GF in F
res_1 <- results(dds,contrast=c("Group", "femaleSPF", "femaleGF"), tidy = TRUE)
#Compare 4w vs 12w in F
res_2 <- results(dds, contrast=c("Group", "female4wConv", "female12wConv"), tidy = TRUE)
#Compare SPF vs 4w in F
res_3 <- results(dds, contrast=c("Group", "femaleSPF", "female4wConv"), tidy = TRUE)
#Compare SPF vs 12w in F
res_4 <- results(dds, contrast=c("Group", "femaleSPF", "female12wConv"), tidy = TRUE)
#Compare SPF v GF in M
res_5 <- results(dds,contrast=c("Group", "maleSPF", "maleGF"), tidy = TRUE)
#Compare 4w vs 12w in M
res_6 <- results(dds, contrast=c("Group", "male4wConv", "male12wConv"), tidy = TRUE)
#Compare SPF vs 4w in M
res_7 <- results(dds, contrast=c("Group", "maleSPF", "male4wConv"), tidy = TRUE)
#Compare SPF vs 12w in M
res_8 <- results(dds, contrast=c("Group", "maleSPF", "male12wConv"), tidy = TRUE)
sum(res_1$pvalue < 0.05, na.rm=TRUE)
## [1] 5656
sum(!is.na(res_1$pvalue))
## [1] 18742
sum(res_1$padj < 0.1, na.rm=TRUE)
## [1] 4795
sum(res_2$pvalue < 0.05, na.rm=TRUE)
## [1] 386
sum(!is.na(res_2$pvalue))
## [1] 18742
sum(res_2$padj < 0.1, na.rm=TRUE)
## [1] 4
sum(res_3$pvalue < 0.05, na.rm=TRUE)
## [1] 2676
sum(!is.na(res_3$pvalue))
## [1] 18742
sum(res_3$padj < 0.1, na.rm=TRUE)
## [1] 1068
sum(res_4$pvalue < 0.05, na.rm=TRUE)
## [1] 2501
sum(!is.na(res_4$pvalue))
## [1] 18742
sum(res_4$padj < 0.1, na.rm=TRUE)
## [1] 1028
sum(res_5$pvalue < 0.05, na.rm=TRUE)
## [1] 2287
sum(!is.na(res_5$pvalue))
## [1] 18742
sum(res_5$padj < 0.1, na.rm=TRUE)
## [1] 674
sum(res_6$pvalue < 0.05, na.rm=TRUE)
## [1] 875
sum(!is.na(res_6$pvalue))
## [1] 18742
sum(res_6$padj < 0.1, na.rm=TRUE)
## [1] 0
sum(res_7$pvalue < 0.05, na.rm=TRUE)
## [1] 1401
sum(!is.na(res_7$pvalue))
## [1] 18742
sum(res_7$padj < 0.1, na.rm=TRUE)
## [1] 168
sum(res_8$pvalue < 0.05, na.rm=TRUE)
## [1] 1738
sum(!is.na(res_8$pvalue))
## [1] 18742
sum(res_8$padj < 0.1, na.rm=TRUE)
## [1] 305
resultsNames(dds)
## [1] "Intercept"                        "Group_female12wConv_vs_femaleSPF"
## [3] "Group_female4wConv_vs_femaleSPF"  "Group_femaleGF_vs_femaleSPF"     
## [5] "Group_maleSPF_vs_femaleSPF"       "Group_male12wConv_vs_femaleSPF"  
## [7] "Group_male4wConv_vs_femaleSPF"    "Group_maleGF_vs_femaleSPF"
condition_LFC <- lfcShrink(dds, coef = "Group_female12wConv_vs_femaleSPF", type = 'apeglm')
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(condition_LFC, ylim=c(-2,2))

topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 50)
mat  <- assay(vsd)[ topVarGenes, ]
mat  <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("Sex","Condition")])
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno, cluster_cols = F)

pheatmap(mat, annotation_col = anno)

pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno)

fGSEA

pathway_folder <- 'pathways/'

fgsea_output <- list()
ranks <- list()

mart <- useDataset("mmusculus_gene_ensembl", mart=useMart("ensembl"))
bm <- getBM(attributes=c("external_gene_name", "hsapiens_homolog_associated_gene_name"), mart=mart) %>%
  distinct() %>%
  as_tibble() %>%
  na_if("") %>%
  na.omit()

# ens2symbol <- AnnotationDbi::select(org.Mm.eg.db,
# key=WT_res$row,
# columns="SYMBOL",
# keytype="ENSEMBL")
# ens2symbol <- as_tibble(ens2symbol)
# WT_res <- inner_join(WT_res, bm, by=c("row"="ensembl_gene_id"))

fGSEA_pipe <- function(res, path, title, pres=FALSE) {
  res2 <- inner_join(res_1, bm, by=c("row"="external_gene_name")) %>%
    select(hsapiens_homolog_associated_gene_name, stat) %>%
    na.omit() %>%
    distinct() %>%
    group_by(hsapiens_homolog_associated_gene_name) %>%
    summarize(stat=mean(stat))
  
  ranks <- deframe(res2)
  if(pres) head(ranks, 20)
  
  if(path == "hallmark") p <- "h.all.v7.0.symbols.gmt"
  else if(path == "KEGG") p <- "c2.cp.kegg.v7.0.symbols.gmt"
  else if(path == "mir") p <- "c3.mir.v7.0.symbols.gmt"
  else if(path == "GO_all") p <- "c5.all.v7.0.symbols.gmt"
  else if(path == "GO_BP") p <- "c5.bp.v7.0.symbols.gmt"
  
  fgseaResTidy <- fgsea(pathways=gmtPathways(paste0(pathway_folder, p)), stats=ranks, nperm=100000) %>%
    as_tibble() %>%
    filter(padj < 0.05) %>%
    arrange(desc(NES))
  
  p <- ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
    geom_col() +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title=paste0(title, ": ", path, " pathways NES from GSEA")) +
    theme_minimal()
  return(p)
}
fGSEA_pipe(res_1, "hallmark", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_1, "KEGG", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_1, "GO_BP", "Female SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_2, "hallmark", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_2, "KEGG", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_2, "GO_BP", "Female 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_3, "hallmark", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_3, "KEGG", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_3, "GO_BP", "Female SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_4, "hallmark", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_4, "KEGG", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_4, "GO_BP", "Female SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_5, "hallmark", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_5, "KEGG", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_5, "GO_BP", "Male SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_6, "hallmark", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_6, "KEGG", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_6, "GO_BP", "Male 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_7, "hallmark", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_7, "KEGG", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_7, "GO_BP", "Male SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_8, "hallmark", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_8, "KEGG", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_8, "GO_BP", "Male SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
 

By Sumeed Yoyo Manzoor

smanzoor@uchicago.edu